import jsonlines
import random
from tqdm import tqdm
import pickle
import itertools
import sys

docmap = {}

def get_doc(docs):
    if docs in docmap:
        return docmap[docs]
    docmap[docs] = nlp(docs)
    return docmap[docs]

def save_maps(fname,mmap):
    with open(fname, 'wb+') as handle:
        pickle.dump(mmap, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
def load_map(fname):
    return pickle.load(open(fname,'rb'))


stop_words_more = ["a","an","the","how","who","what","which","where","when","is","was","that","there"]
def create_norm_reverseindex(mmap,stop_words_more):
    rev_map={}
    stop_words_more = set(stop_words_more)
    for k,v in tqdm(mmap.items(),ascii=True):
        for ent in v:
            ent = ent.lower()
            esplits = ent.split(" ")
            if len(esplits)>=1:
                tokens = set(esplits)-stop_words_more
                for ktok in tokens:
                    klist = rev_map.get(ktok,[])
                    klist.append(k)
                    rev_map[ktok]=klist
    return rev_map

def dedupe_map(mmap):
    new_map={}
    for k,v in tqdm(mmap.items(),ascii=True):
        new_map[k]=list(set(v))
    return new_map


def write_graph(rev_np_map_expanded,outfile):
    with open(outfile,"w") as ofd:
        for k,v in tqdm(rev_np_map_expanded.items(),ascii=True):
            for x in v:
                ofd.write(f"{k}|{x}\n")
                
if __name__ == '__main__':
    npmap = load_map("../data/triples/omcsnp_map.pickled")
    vpmap = load_map("../data/triples/omcsvp_map.pickled")
    allmap={**vpmap, **npmap}
    rev_map = create_norm_reverseindex(allmap,stop_words_more)
    rev_map = dedupe_map(rev_map)
    write_graph(rev_map,"graphs_omcs.csv")
    